# -*- coding = utf-8 -*-
# @Time    : 2025/3/14 下午8:14
# @Author  : yqk
# @File    : study_bs4.py
# @Software: PyCharm
import re

from bs4 import BeautifulSoup

file = open('./baidu.html', 'rb')
html = file.read()
bs = BeautifulSoup(html, 'html.parser')
# 1.tag
# print(bs.title)
# print(bs.head)
# print(bs.a)

#2.内容

# print(bs.title.string)

#3.整个文档
# print(bs)

#文档遍历
# print(bs.head.contents)
# print(bs.head.contents[1])
#



# 4.搜索文档
# print(bs.find_all('p'))
# t = bs.find_all("div")
# print(t)


# 正则 使用search()匹配内容
# import re
# t = bs.find_all(re.compile("t"))
# print(t)


# t = bs.find_all(id="head")
# for i in t:
#     print(i)

# t = bs.find_all(class_=True)
# for i in t:
#     print(i)
#
# t = bs.find_all(href= True)
# for i in t:
#     print(i)

# t = bs.find_all(string = "百度")
# for i in t:
#     print(i)

# import re
#
# # 使用 string 参数来查找包含数字的文本，并避免弃用警告
# t = bs.find_all(string=re.compile(r"\d"))
#
# # 输出匹配的内容
# for i in t:
#     print(i)


# 选择器
# t = bs.select('title')
# print(t)
# t = bs.select('.bd')


